import wave
import sys
import json
from vosk import Model, KaldiRecognizer, SetLogLevel
SetLogLevel(-1)
def get_text():
    wf = wave.open("test.wav", "rb")
    if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getcomptype() != "NONE":
        print("音频格式为wav单声道文件")
        sys.exit(1) 
    model = Model("model_cn_small")
    rec = KaldiRecognizer(model, wf.getframerate())
    rec.SetWords(True)
    str_ret = ""

    while True:
        data = wf.readframes(4000)
        if len(data) == 0:
            break
        if rec.AcceptWaveform(data):
            result = rec.Result()
            # print(result)
            result = json.loads(result)
            if 'text' in result:
                str_ret += result['text'] + ' '
    result = json.loads(rec.FinalResult())
    if 'text' in result:
        str_ret += result['text']
    str_ret="".join(str_ret.split(" "))
    return str_ret

